library(plyr)
library(estimatr)
library(emmeans)
library(tidyverse)
library(magrittr)
library(broom)
library(survey)

d1 <- "https://github.com/thomasjwood/full_fact/raw/main/ff_mt.rds" %>% 
  url %>% 
  gzcon  %>% 
  readRDS %>% 
  mutate(
    out_agree = out_agree %>% 
      mapvalues(
        c("Tend to disagree",
          "Tend to agree"),
        c("Disagree",
          "Agree")
      ) %>% 
      factor(
        c("Strongly disagree",
          "Disagree",
          "Neither agree nor disagree",
          "Agree",
          "Strongly agree")
      ),
    out_true = out_true %>% 
      factor(
        c("True",
          "Probably true",
          "Not sure",
          "Probably false",
          "False") %>% 
          rev
      ),
    agree_num = out_agree %>% 
      as.numeric,
    true_num = out_true %>% 
      as.numeric,
    cond = cond %>% 
      factor(
        c("cond_items",
          "cond_misinfo",
          "cond_corr")
      ),
    employ2 = employ %>% 
      mapvalues(
        c("Retired",
          "Full-time parent, homemaker",
          "Self employed", 
          "Student/Pupil",
          "Employed full-time",
          "Employed part-time", 
          "Unemployed and not looking for a job/Long-term sick or disabled", 
          "Unemployed but looking for a job"),
        c("Retired",
          "Unemployed",
          "Employed", 
          "Student",
          "Employed",
          "Employed", 
          "Unemployed", 
          "Unemployed")
      ) %>% 
      factor(
        c("Retired",
          "Employed",
          "Unemployed",
          "Student")
      ),
    ideo2 = ideo %>% 
      mapvalues(
        c("Left1", "1", "2", "3", "4",
          "5", "6",
          "7", "8", "9",  "10", "Right10", 
          
          "DK", "Don't know", "Prefer not to say"),
        c("Left", "Moderate", "Right", "DK") %>% 
          rep(
            c(5, 2, 5, 3)
          )
      ) %>% 
      factor(
        c("Left", "Moderate", "Right", "DK")
      ),
    ideonum = ideo %>% 
      mapvalues(
        c("Left1", "1", "2", "3", "4",
          "5", "6",
          "7", "8", "9",  "10", "Right10", 
          
          "DK", "Don't know", "Prefer not to say"),
        c(1, 1:10, 10, NA, NA, NA)
      ) %>% 
      as.character %>% 
      as.numeric,
    age = 2020 %>% 
      subtract(
        birthyr
      ) %>% 
      cut(
        breaks = c(-Inf, 26, 40, 60, Inf),
        labels = c(
          "18-26", "27-40", "41-60", "61+"
        )
      )
    )

d1$com_num <- d1$agree_num %>% 
  add(
    d1$true_num
  ) %>% 
  divide_by(2)

# count of correction exposures

d1 %<>% 
  left_join(
    d1 %>% 
      filter(wave == "wave 1") %>% 
      select(
        caseid, country, cond
      ) %>% 
      group_by(
        caseid, country
      ) %>% 
      summarize(
        cnt_corr = cond %>% str_detect("_corr") %>% sum,
        cnt_mis  = cond %>% str_detect("_misinfo") %>% sum
      )
    )


# attrition check--gen dummy for 1 wave respondents

d1 %<>% 
  left_join(
    d1 %>% 
      select(
        country, caseid, wave
      ) %>% 
      group_by(
        country, caseid, wave
      ) %>% 
      slice(1) %>% 
      group_by(
        country, caseid
      ) %>% 
      summarize(
        waves_done = wave %>% unique %>% length
      ) %>% 
      mutate(
        attrited = waves_done %>% 
          equals(1) %>% 
          ifelse(0, 1)
      )
  ) %>% 
  select(-waves_done) %>% 
  filter(
    country %>% 
      equals("nigeria") %>% 
      not
    )

t1 <- d1 %>% 
  mutate(
    country = issue %>%
      str_detect(
        "global|saltwater"
      ) %>%
      ifelse(
        "Multi-country",
        country
      )
  ) %>%
  filter(
    wave == "wave 1" &
    country %>% 
      equals("Multi-country") %>% 
      not
    ) %>%
  group_by(country, caseid) %>% 
  slice(1) %>% 
  group_by(
    country
  ) %>% 
  nest 

t1$mis_mods <- t1$data %>% 
  map(
    \(i)
    glm(
      attrited ~  age + employ2 + ideo2 + gender + cnt_corr + cnt_mis,
      data = i,
      family = binomial()
      )
    )

t2 <- t1 %>% 
  pmap_dfr(
    \(country, data, mis_mods)
    data %>% 
      mutate(
        ipw = 1 %>% 
          divide_by(
            mis_mods %>% 
              predict(type = "response")
          ),
        country = country)
  ) %>% 
  select(
    caseid, country, ipw
  )

t3 <- d1 %>% 
  filter(
    wave %>% 
      equals("wave 2")
  ) %>% 
  left_join(t2) %>% 
  mutate(
    country = issue %>%
      str_detect(
        "global|saltwater"
      ) %>%
      ifelse(
        "Multi-country",
        country
      )
    ) %>% 
  group_by(
    country, issue
  ) %>% 
  nest

t3$mods <- t3$data %>% 
  map(
    ~lm_robust(
      com_num ~ cond, 
      data = .x
    )
  )

t3$des <- t3$data %>% 
  map(
    function(i)
      svydesign(
        ~1, 
        data = i,
        weights = i$ipw
        )
    )

t3$s_mods <- t3 %>%
  ungroup %>% 
  pmap(
    function(
      country, issue, data, mods, des
    )
      
      svyglm(
        com_num ~ cond, 
        data = data,
        design = des
      )
  )

t4 <- t3 %>% 
  ungroup %>% 
  select(country, issue, mods, s_mods) %>% 
  pivot_longer(
    names_to = "type", 
    values_to = "mods", 
    cols = ends_with("mods")   
  ) %>% 
  left_join(
    t3 %>% 
      ungroup %>% 
      select(country, issue, data)
  )

t4$emm <- t4$mods %>% 
  map2(
    t4$data,
    ~emmeans(
      .x, 
      consec ~ cond, 
      data = .y, 
      adjust = "none"
    )
  )

t5 <- t4 %>% 
  select(
    country, issue, type, emm
  ) %>% 
  pmap_dfr(
    \(country, issue, type, emm)
    
    
    # emm <- t4$emm[[1]]
    
    emm %>% 
      extract2("contrasts") %>% 
      tidy %>% 
      mutate(
        country = country, 
        issue = issue, 
        type =  type
      )
    ) %>% 
  mutate(
    contrast = contrast %>% 
      str_detect("cond_items") %>% 
      ifelse(
        "Misinformation effect",
        "Correction effect"
      ) %>% 
      factor(
        c("Misinformation effect",
          "Correction effect")
        )
    )

# separately

t6 <- tibble(
  contrast = t5$contrast %>% 
    levels,
  cor = t5 %>% 
    select(
      -c(null.value, std.error:p.value, z.ratio)
    ) %>% 
    spread(type, estimate) %>% 
    group_by(contrast) %>% 
    nest %>% 
    use_series(data) %>% 
    map_dbl(
      ~cor(.$mods, .$s_mods) %>% 
        round(3)
      ) %>% 
    as.character %>% 
    str_sub(2)
)

t5 %>% 
  select(
    -c(null.value, std.error:p.value, z.ratio)
  ) %>% 
  spread(type, estimate) %>% 
  ggplot(
    aes(mods, s_mods)
  ) +
  geom_abline(
    slope = 1,
    intercept = 0,
    size = .3,
    linetype = "dashed"
  ) +
  geom_point() +
  geom_text(
    aes(
      x, y, label = lab
    ),
    data = t6 %>% 
      mutate(
        x = -.4,
        y = .175,
        lab = str_c(
          "Correlation = " %>% 
            c(""),
          cor
        ),
        contrast = contrast %>% 
          factor(
            t5$contrast %>% 
              levels
          )
      ),
    size = 3
  ) +
  facet_wrap(~contrast, nrow = 1) +
  labs(
    x = 'Unweighted models\' estimates',
    y = 'Inverse probability\nweighted models\' estimates'
  ) +
  theme(
    strip.text.x = element_text(
      # size =  11, 
      angle = 0
    ),
    strip.text.y = element_text(
      # size =  11, 
      angle = 0
    ),
    legend.position = "none",
    axis.text.y = element_text(
      # angle = 45/2,
      hjust = .5
      # vjust = 1
    )
  )

